*clear
*cd "/Users/Kristina/Documents/Documents/kristina_school/Pol_Projects/PEW.2002.2007.threat"
*use "2007LatinoSurvey_final_data_publicrelease.dta"


******************************************************************
* Kristina Victor
* Date Started: February 8, 2012
* Project: Pew Hispanic Center Recode

*Edited by Brad Jones
*Date Started: February 2, 2015
******************************************************************


clear 
* Using the 2007 Pew Hispanic Center Survey
 cd "/Users/bsjjones/Dropbox/Project 1 Threat and Latinos/PTT Paper/PewAnalysis2015/PEW2007"

 use "/Users/bsjjones/Dropbox/Project 1 Threat and Latinos/ptt paper/PewAnalysis2015/PEW2007/pew2007original.dta"

set more off

* Dropping labels to avoid confusion in labeling after recodes
*label drop _all

gen year=2007
gen uniqueidentifier=_n

* Drop observations for non-Latinos
drop if qn1==2
drop if qn1==9



****************************************************************
* 															   *
* 				Demographic Variables Recode				   *
*															   *
****************************************************************


* Recode Gender 1=female, 0=male

gen female=1 if qnd18==2
	replace female=0 if qnd18==1
	label variable female "gender, 1=female" 

* Heritage variables and birthplace variables

gen USborn=qn5
	recode USborn (1=0) (2=1) (3=0) (98=0) (99=0)
	label variable USborn "Born in US, excl Puerto Rico"


gen USPRborn=qn5
	recode USPRborn (1=1) (2=1) (3=0) (98=0) (99=0)
	label variable USPRborn "Born in US or Puerto Rico"

gen PRborn=1 if qn5==1
    replace PRborn=0 if PRborn==. 

*US Citizen or not

gen citizen=qn9
	recode citizen (1=1)(2=0)(3=0) (8=0) (9=0)
	replace citizen=1 if USPRborn==1
	label variable citizen "1 if US citizen"


*Citizenship defined for naturalized citizens
	
gen naturalized=1 if qn5>=3 & qn9==1 	
    replace naturalized=0 if qn5==3 & qn9==2 | qn9==8 | qn9==9  
	

*Citizenship defined for only those born in the United States.  This exlcudes
*Puerto Ricans and excludes naturalized citizens*
	
gen nativecitizen=citizen
    replace nativecitizen=. if qn5==1 
	replace nativecitizen=. if naturalized==1
	replace nativecitizen=0 if qn9==2
	
*Constructing Generational Indicators*/ 

*1st Generation: define first generation as anyone who was born outside of the US or Puerto Rico*

gen firstgen=1 if USborn==0
    replace firstgen=0 if USborn==1	

	
*2nd Generation: define as anyone who had at least one parent born outside of the US*	

*Define Mother and Father's Place of Birth* 
	
gen motherFB=1 if USborn==1 & qn7==1
    replace motherFB=1 if USborn==1 & qn7==3
	replace motherFB=1 if USborn==1 & qn7==98
    replace motherFB=1 if USborn==1 & qn7==99
	replace motherFB=0 if USborn==1 & qn7==2

	
gen fatherFB=1 if USborn==1 & qn8==1
    replace fatherFB=1 if USborn==1 & qn8==3
	replace fatherFB=1 if USborn==1 & qn8==98
    replace fatherFB=1 if USborn==1 & qn8==99
	replace fatherFB=0 if USborn==1 & qn8==2
	
	
	*Sum mother with father to determine total number of FB parents*
gen numberparentFB=motherFB + fatherFB	
	
gen oneparentFB=1 if numberparentFB==1
 
gen bothparentFB=1 if numberparentFB==2
	
	
*The way this is coded, a 0 would be an implied 3rd+ generation Hispanic*	
	
gen secondgen=1 if numberparentFB>=1  & USborn==1
    replace secondgen=0 if numberparentFB==0 & USborn==1	

*Alternative coding: 2.5 generation defined as having 1 parent US born/1 for born*

*Coded 1 if respondent has 1 fb parent and 0 if the respondent has 2 fb parents*

gen secondpointfive=1 if secondgen==1 & numberparentFB==1
    replace secondpointfive=0 if numberparentFB==2 & USborn==1	

*Third generation indicator*

gen thirdgenplus=1 if USborn==1 & secondgen==0


*1.5 generation is defined as someone who came as an adolescent*
*We can determine age and years in the country to approximate the 1.5ers*
*Years in country is defined as missing for Puerto Rican Respondents. This is
*because I using this variable to construct a 1.5G measure*

gen yearsincountry=qn6
    replace yearsincountry=. if qn5==1
    replace yearsincountry=. if qn6==98 
	replace yearsincountry=. if qn6==99
* Age of Respondent

gen age=qn50
	recode age (99=.) (98=.)

	
*Approximate age of Respondent when they came to the United States excluding P.R.*

gen entryage=age-yearsincountry

*Define 1.5 generation as someone who came here before adolescence, define here as 12. Definition comes from Rumbaut  
*1=entered US before age 12; 0=entered US after age 12.  Thus a 0 is a "true" first generation immigrant*


gen onepointfive=1 if entryage<=12 & firstgen==1 & USborn==0
    replace onepointfive=0 if entryage>12 & firstgen==1
	

*Generation Indicator*

gen generation=1 if onepointfive==0
    replace generation=1.5 if onepointfive==1
    replace generation=2 if secondpointfive==0
 	replace generation=2.5 if secondpointfive==1
	replace generation=3 if thirdgenplus==1
	

*Possible Undocumented*

*Does Respondent have a greencard?*
	
gen greencard=1 if qn69==1
    replace greencard=0 if qn69==2 | qn69==8| qn69==9

	*Does the respondent have a government ID?*
	
gen govid=1 if qn70==1
    replace govid=0 if qn70==2 | qn70==8 | qn70==9	

gen possibleundoc=1 if govid==0 
    replace possibleundoc=0 if govid==1 

	**************************************************
* Married

gen married=qn48
	recode married (1=1) (2=2) (3=3) (4=4) (5=5) (6=6) (8=7) (9=7)
	label variable married "1 Married, 2 Have partner, 3 Widowed, 4 Divorced, 5 Separated, 6 Never Been Married, 7 refused"


* Spanish Dominant

gen spdominant=1 if primary==3
	replace spdominant=0 if primary~=3
	label variable spdominant "1 Spanish Dominant, 0 Bilingual or English"
	
* Primary Language * 

gen language=primary
       label variable language "1=english dom, 2=bi, 3=span dom"
	   
* Education 

gen education7=qn51 
	recode education7 (9=.)  (4=3) (5=4) (6=5) (7=6) (8=7)
	label variable education7 "1 None or 8th, 2 HS incomplete, 3 GED, 4 HS, 5 Technical, 6 Some college, 7 college, 8 graduate"

* Education variable with 3 categories

gen education3=qn51
	recode education3 (9=.) (1=1) (2=1) (3=1) (4=1) (5=2) (6=2) (7=3) (8=3)
	label variable education3 "1 High school or less, 2 Some tech or college, 3 college or graduate"
	

	
* Employed 

gen employed=qn52
	recode employed (1=1) (2=1) (3=0) (8=.) (9=.)
	label variable employed "1 Employed Full or Part Time, 0 Other"
	
	
* Income with Categories

gen incomecats=. 
	replace incomecats=1 if qn57==1
	replace incomecats=2 if qn57==2
	replace incomecats=3 if qn57==3
	replace incomecats=4 if qn57==4
	replace incomecats=5 if qn57==5
	replace incomecats=6 if qn57==6
	replace incomecats=7 if qn58==1
	replace incomecats=8 if qn58==2
	replace incomecats=9 if qn58==3
	replace incomecats=10 if qn58==4
	replace incomecats=11 if qn59==1
	replace incomecats=12 if qn59==2
	replace incomecats=13 if qn59==3
	replace incomecats=14 if qn59==4
	replace incomecats=15 if qn59==5
	replace incomecats=16 if qn59==6 
	label variable incomecats "1 5K, 2 5-10K, 3 10-15K, 4 15-20K, 5 20-25K, 6 25-30K, 7 30-35K, 8 35-40K, 9 40-45K, 10 45-50K, 11 50-60K, 12 60-75K, 13 75-100K, 14 100-150K, 15 150-200L, 16 200+"

*The following imputes missing data on income using impute cmd*

set seed 45435353
impute incomecats education7 female spdominant age generation married employed, gen(incomecatsI)	
replace incomecatsI=round(incomecatsI, 1)


	
* Number of adults living in the household:

gen adults=qn61
	recode adults (98=.) (99=.)

gen havechildren=qn49
    recode havechildren (1=1) (2=0) (8=0) (9=0)
	label variable havechildren "1=yes, 2=no"	

set seed 42525252
   	impute adults incomecats education7 female spdominant age generation married employed, gen(adultsI)
    replace adultsI=round(adultsI, 1)
	

* The region of the country where respondent resides:

gen region=sample03

* The metro status of the respondent:

gen metro=sample14


****************************************************************
* 
* 						Attitudinal Items					   *
*
****************************************************************

*How confident are you that Latino children growing up now in the US will have*/
*better jobs and make more money than you?*/

gen childbetter=qn11
   recode childbetter (1=1) (2=2) (8=3) (9=3) (3=4) (4=5) 
   label variable childbetter "1= Very Confident, 2=Somewhat, 3=Don't Know, 4=Somewhat Confidnet, 5=Not At All Confident"

* Party ID

gen corepartyquestion=qn17

replace qn17=4 if qn17==7

gen pid2=qn17
   recode pid2 (1=1) (2=2) (3=3) (4=4) (8=5) (9=5)
   label variable pid2 "1 Rep, 2 Dem, 3 Ind, 4 Something Else, 5 Don't Know/Refused"

 
*This code reproduces Pew's combination code.    
gen pidL=1 if qn17==1
replace pidL=2 if qn17==2
replace pidL=1 if qn18==1
replace pidL=2 if qn18==2
replace pidL=3 if qn18==3 & qn17==3
replace pidL=3 if qn18>3 & qn17==3

replace pidL=4 if qn18==4 & qn17==4
replace pidL=4 if qn18>=3 & qn17==4
replace pidL=8 if qn18==8 & qn17==8
replace pidL=8 if qn18>=3 & qn17==8
replace pidL=9 if qn18==9 & qn17==9
replace pidL=9 if qn18>=3 & qn17==9

   
 
 
 
 
 
gen register=qn16
    recode register (1=1) (2=0) (3=0) (8=0) (9=0)

* Amount of immigrants (There is a lot of missing data here

gen amount=qn24
	recode amount (3=2) (2=3) (8=.) (9=.)
	label variable amount "1 Too Many, 2 Right Amount, 3 Too Few"

* Do immigrants help or hurt
	
gen helphurt=qn26
	recode helphurt (1=1) (2=2) (8=3) (9=3)
	label variable helphurt "3 dk, 2 hurt, 1 help"



**************************************************************** 
*
* 					Discrimination Questions				   *
*
****************************************************************


* Discrimination in schools

gen discrimschools=qn28a
    replace discrimschools=3 if qn28a==1
	replace discrimschools=1 if qn28a==3
	replace discrimschools=. if qn28a==8
	replace discrimschools=. if qn28a==9
	label variable discrimschools "1 Not a Problem, 2 Minor Problem, 3 Major Problem"


* Discrimination at work

gen discrimwork=qn28b
    replace discrimwork=3 if qn28b==1
	replace discrimwork=1 if qn28b==3
	replace discrimwork=. if qn28b==8
	replace discrimwork=. if qn28b==9 

	label variable discrimwork "1 Not a Problem, 2 Minor Problem, 3 Major Problem"


* Discrimination in general

gen discrimgeneral=qn28c
    replace discrimgeneral=3 if qn28c==1
	replace discrimgeneral=1 if qn28c==3
	replace discrimgeneral=. if qn28c==8
	replace discrimgeneral=. if qn28c==9 
	label variable discrimgeneral "1 Not a Problem, 2 Minor Problem, 3 Major Problem"


*Imputing discrimschool missing data*

set seed 42256262
 impute discrimschools discrimwork discrimgeneral generation age married education7 incomecatsI adultsI spdominant female, gen(discrimschoolsI)
 replace discrimschoolsI=round(discrimschoolsI, 1)

*Imputing discrimwork missing data*

set seed 42256262
 impute discrimwork discrimschoolsI  discrimgeneral generation age married education7 incomecatsI adultsI spdominant female, gen(discrimworkI)
 replace discrimworkI=round(discrimworkI, 1)

*Imputing discrimgeneral missing data

set seed 42256262
 impute discrimgeneral discrimschoolsI  discrimworkI generation age married education7 incomecatsI adultsI spdominant female, gen(discrimgeneralI)
 replace discrimgeneralI=round(discrimgeneralI, 1)



alpha discrimschoolsI discrimworkI discrimgeneralI, gen(discrimscaleI)

**************************************************************** 
*
* 			Ethnic and Racial Backround Questions			   *
*
****************************************************************


* Ethnic Respect 

gen ethrespect=qn30a
	recode ethrespect (1=4) (2=3) (3=2) (4=1) (8=.) (9=.)
	label variable ethrespect "4 Very Often, 3 Fairly, 2 Once in a While, 1 Never"

* Ethnic Service 

gen ethservice=qn30b
	recode ethservice (1=4) (2=3) (3=2) (4=1) (8=.) (9=.)
	label variable ethservice "4 Very Often, 3 Fairly, 2 Once in a While, 1 Never"
	
* Ethnic Government Service

gen ethgovserv=qn30c
	recode ethgovserv (1=4) (2=3) (3=2) (4=1) (8=.) (9=.)
	label variable ethgovserv "4 Very Often, 3 Fairly, 2 Once in a While, 1 Never"

*Impute ethgovserv*

set seed 52561444
impute ethgovserv discrimgeneralI discrimschoolsI discrimworkI ethrespect ethservice generation age married education7 incomecatsI adultsI spdominant female, gen (ethgovservI)
    replace ethgovservI=round(ethgovservI, 1) 

*Impute ethrespect

set seed 353362325
impute ethrespect discrimgeneralI discrimschoolsI discrimworkI ethgovservI ethservice generation age married education7 incomecatsI adultsI spdominant female, gen (ethrespectI)
    replace ethrespectI=round(ethrespectI, 1) 

*Imput ethservice*

set seed 353362325
impute ethservice discrimgeneralI discrimschoolsI discrimworkI ethgovservI ethrespectI generation age married education7 incomecatsI adultsI spdominant female, gen (ethserviceI)
    replace ethserviceI=round(ethserviceI, 1) 



alpha ethrespectI ethserviceI ethgovservI, gen(ethscaleI)

* Bias Question

gen bias=qn29
	recode bias (1=1) (2=0) (8=0) (9=0)
	label variable bias "1 Yes, 0  No"

gen reasons=qn32
   label variable reasons "1=ses, 2=skin, 3=lang, 4-imm, 8,9=dkr"

/*Regardless of your own immigration or citizenship status, how much do you worry that 
	you, a family member, or a close friend could be deported?  Would you say that you 
	worry a lot, some, not much, or not at all?*/


gen worrydeport=qn33
    recode worrydeport (1=4) (2=3) (8=.) (9=.) (3=2) (4=1)
	label variable worrydeport "1=not at all, 5=a lot"

	
	
*************************************************************
*
*   Immigration Policy Questions                            *
*************************************************************
/*43.	How closely did you follow news accounts earlier this year of the debate in Congress 
	about illegal immigration?  
qn43
	1	Very closely
	2	Somewhat closely
	3	Not too closely
	4	Not at all
	8	Don�t know
	9	Refused
*/

gen attentionpaid=qn43
    recode attentionpaid (1=4) (2=3) (3=2) (4=1) (8=.) (9=.)

/*44.	Do you think that the debate over immigration policy and the failure of Congress to 
	enact an immigration reform bill have made life more difficult for (Hispanics/Latinos) 
	living in this country, less difficult, or hasn�t it had an effect?
qn44
	1	More difficult
	2	Less difficult
	3	No effect
	8	Don�t know
	9	Refused*/
	
	
gen betterworse=qn44
    recode betterworse (1=1) (2=3) (3=2) (8=2) (9=2)    
	label variable betterworse "1=better, 3=worse"
	
gen troublejob=qn45a
    recode troublejob (1=1) (2=0) (3=.) (8=0) (9=0)
    label variable betterworse "1=more, 0=same"
		
gen showdocuments=qn45b
    recode showdocuments (1=1) (2=0) (3=.) (8=0) (9=0)
	label variable betterworse "1=more, 0=same"
		
gen findhousing=qn45c
    recode findhousing  (1=1) (2=0) (3=.) (8=0) (9=0)
	label variable betterworse "1=more, 0=same"	

gen traveloutside=qn46a
    recode traveloutside (1=1) (2=3) (3=2) (4=.) (8=3) (9=3) 
	
gen useservices=qn46b 
     recode useservices (1=1) (2=3) (3=2) (4=.) (8=3) (9=3)	
	
	
****************************************************************
*
*				Extra Items for Imputing and Matching			*
*
****************************************************************



* Not hired or promoted due to ethnicity

gen promote=qn31
	recode promote (1=1) (2=3) (8=2) (9=.)
	label variable promote "1 Yes, 2 Don't Know, 3 No"

* What party has more concern for Latinos?

gen concern=qn13
	recode concern (1=1) (8=2) (3=2) (2=3) (9=.)
	label variable concern "1 Democrat, 2 Don't Know or No Diff, 3 Republican"

* Read in Spanish

gen spkspan=qn34
	recode spkspan (1=1) (2=2) (3=3) (4=4) (8=.) (9=.)
	label variable spkspan "1 very well, 2 pretty well, 3 just a little, 4 not at all"

gen rdspan=qn35
	recode rdspan (1=1) (2=2) (3=3) (4=4) (8=.) (9=.)
	label variable rdspan "1 very well, 2 pretty well, 3 just a little, 4 not at all"

gen spkenglish=qn36
	recode spkenglish (1=1) (2=2) (3=3) (4=4) (8=.) (9=.)
	label variable spkenglish "1 very well, 2 pretty well, 3 just a little, 4 not at all"

gen rdenglish=qn37
	recode rdenglish (1=1) (2=2) (3=3) (4=4) (8=.) (9=.)
	label variable rdenglish "1 very well, 2 pretty well, 3 just a little, 4 not at all"

***********************************************************************

	
/*	
15.	Which political party do you think is doing a better job of dealing with illegal 
	immigration?
qn15
	1	Republican Party
	2	Democratic Party
	3	Both equally
	4	Neither
	8	Don�t Know
	9	Refused
*/
	
gen bestposition=qn15
    recode bestposition (2=1) (3=2) (8=2) (9=2) (1=3) 	
	label variable bestposition "1=D, 2=None/dk, 3=R"

 
/*27.	Overall, what is the effect of the growing number of undocumented or illegal 
	immigrants on (Hispanics/Latinos) living in the U.S.?  
	Would you say it�s (READ LIST)?
qn27
	1	A positive development
	2	A negative development
	3	Or would you say it has had no impact one way or the other?
	8	Don�t know
	9	Refused
*/

gen impactonhispanics=qn27
   recode impactonhispanics (1=1) (3=2) (8=2) (9=2) (2=3) 
   label variable impactonhispanics "1=pos, 2=none, 3=negative"
   
   
 ***************************Region*****************************
 
 
gen bornmexico=qn5a
    recode bornmexico (1=0) (18=1)
	replace bornmexico=0 if qn5a~=18 & USPRborn==0
	
gen borncuba=qn5a
    recode borncuba (1=0) (9=1)
	replace borncuba=0 if qn5a~=9 & USPRborn==0
	
gen borncentralam=qn5a
    recode borncentralam (1=0) (3=1) (8=1) (12=1) (14=1) (17=1) (19=1) (20=1) 
	replace borncentralam=0 if borncentralam~=1 & USPRborn==0
	
gen borncaribbean=qn5a
    recode borncaribbean (1=0) (2=1) (10=1) (16=1) (30=1) (31=1)
	replace borncaribbean=0 if borncaribbean~=1 & USPRborn==0

gen bornsoutham=qn5a
    recode bornsoutham (1=1) (4=1) (5=1) (6=1) (7=1) (11=1) (13=1) (15=1) (21=1) (22=1) (26=1) (27=1) (28=1) (29=1)
	 replace bornsoutham=0 if bornsoutham~=1 & USPRborn==0
	 
 
 
gen connectmexico=1 if qn4==1 | bornmexico==1
    replace connectmexico=0 if qn4~=1

	 
gen connectcuba=1 if qn4==3	 | borncuba==1
    replace connectcuba=0 if qn4~=3
		 
gen connectcentralam=1 if qn4==5 | qn4==6 | borncentralam==1
    replace connectcentralam=0 if connectcentralam==. 

		 
gen connectcaribbean=1 if  qn4==4 | qn4==8 | borncaribbean==1
    replace connectcaribbean=0 if connectcaribbean==.	 
	
		 
gen connectsoutham=1 if qn4==7 | bornsoutham==1
    replace connectsoutham=0 if connectsoutham==.	
 
 
 gen connectpr=1 if qn4==2
    replace connectpr=0 if qn4~=2
   
   ******************Race**************
   
 
gen racesummary=.


***************Drop Variables**********************

drop qnd18 qn1 qn2 qn3 qn4 qn5 qn5a qn6 qn6m qn7 qn8 qn9 qn10 qn11 qn12 qn13 qn14 qn15 qn16 qn17 qn18 qn19 qn20_01 qn20_02 qn20_03 qn20_04 qn20_05 qn20_06 qn20_07 qn20_08 qn20_09 qn20_10 qn21 qn22 qn23a qn23b qn23e qn23g qn23i qn23j qn24 qn25_01 qn25_02 qn25_03 qn26 qn27 qn28a qn28b qn28c qn29 qn30a qn30b qn30c qn31 qn32 qn33 qn34 qn35 qn36 qn37 spnspk spnrd engspk engrd qn38 qn39 qn40 qn41 qn42 qn43 qn44 qn45a qn45b qn45c qn46a qn46b qn48 qn49 qn50 qn51 qn52 qn53 qn54 qn55 income qn56 qn57 qn58 qn59 qn61 qn62 qn63 qn64 qn65 qn66 qn67 qn68 qn69 qn70 qn71 sample03 sample14 sample28 sample29  llweight coweight primary

saveold 2007core.dta, replace


 *Create data set with cases having missing data for purposes of imputation in R*
 
 
 keep  age bias pidL language generation metro region uniqueidentifier female citizen education7 married employed  incomecats adults discrimschools discrimwork discrimgeneral ethrespect ethservice ethgovserv
 
 
 saveold 2007coreforimputation.dta, replace 
 
 
 
clear
 
 *Export to this file to R to run Amelia, save imputed file and merge to core file*
 
 use imputed2007.dta
 
 sort uniqueidentifier
 
 saveold imputed2007.dta, replace 
 
 use 2007core.dta
 
 
drop  age bias pidL language generation metro region  female citizen education7 married employed  incomecats adults discrimschools discrimwork discrimgeneral ethrespect ethservice ethgovserv
 
 
 
 
 sort uniqueidentifier
 
 merge uniqueidentifier using imputed2007.dta
 
 drop _merge
 
 
 saveold 2007core_16.dta, replace 
 
 cd ..
 
 cd PEWMERGED
 
 sort year
 
 saveold 2007core_16.dta, replace 
 
 
 
 
**********************************************************************************
**********************************************************************************

